1 Intoduction

Today we are going to work with microbiome data. In this recitation we are going to provide microbiome data as the result of the shotgun sequencing of the pig gut microbiome.

2 Pig gut microbiome data

The goal of this recitation is to replicate the following plot, which expresses the relationship between the Bacteroidetes and Firmicutes while the rest of the Phyla levels were assigned to others.

2.1 How many rows and columns does the data have?

library(tidyverse)
library(plotly)

pig_micro <- read_csv("data/Phyla_RelAbund_Final_Filtered_WithMetadata.csv")
dim(pig_micro)
## [1] 60 50

The data contains 60 rows and columns

2.2 How many phyla does the data contains and how many columns represents metadata of the experiment?

glimpse(pig_micro[seq(3), seq(8)])
## Rows: 3
## Columns: 8
## $ Sample_Name        <chr> "ShotgunWGS-ControlPig6GutMicrobiome-Day14", "Shotg…
## $ Pig                <dbl> 6, 8, 3
## $ Diet               <chr> "Control", "Control", "Control"
## $ Time_Point         <chr> "Day 14", "Day 0", "Day 14"
## $ Diet_By_Time_Point <chr> "Control Day 14", "Control Day 0", "Control Day 14"
## $ Acidobacteria      <dbl> 0.000741, 0.000885, 0.000690
## $ Actinobacteria     <dbl> 0.04813358, 0.06598073, 0.03267269
## $ Apicomplexa        <dbl> 5.95e-05, 9.14e-05, 4.92e-05

The first 5 rows represents metadata from Sample_Name to Diet_By_Time_Point while the columns from Acidbacteria to the last columns Unclassified derived from sequence.

2.3 Computing the cumulative abundance for other phylum

2.3.1 Create a new column with a new phyla assignation

Keep the phyla level when they are Firmicutes or Bacteroidetes, otherwise assign Phyla to “Other level”.

Hint: You may need to pivot the data to evaluate the column names as observations

pig_micro %>% 
  pivot_longer(cols = Acidobacteria:`unclassified (derived from other sequences)`,
              values_to = "Abundance", names_to = "Phyla") %>% 
  mutate(Phyla = case_when(Phyla == "Bacteroidetes" ~ Phyla,
                           Phyla == "Firmicutes" ~ Phyla,
                           TRUE ~ "Other phyla") ) %>% 
  head()
## # A tibble: 6 × 7
##   Sample_Name                            Pig Diet  Time_…¹ Diet_…² Phyla Abund…³
##   <chr>                                <dbl> <chr> <chr>   <chr>   <chr>   <dbl>
## 1 ShotgunWGS-ControlPig6GutMicrobiome…     6 Cont… Day 14  Contro… Othe… 7.41e-4
## 2 ShotgunWGS-ControlPig6GutMicrobiome…     6 Cont… Day 14  Contro… Othe… 4.81e-2
## 3 ShotgunWGS-ControlPig6GutMicrobiome…     6 Cont… Day 14  Contro… Othe… 5.95e-5
## 4 ShotgunWGS-ControlPig6GutMicrobiome…     6 Cont… Day 14  Contro… Othe… 5.03e-4
## 5 ShotgunWGS-ControlPig6GutMicrobiome…     6 Cont… Day 14  Contro… Othe… 3.84e-4
## 6 ShotgunWGS-ControlPig6GutMicrobiome…     6 Cont… Day 14  Contro… Othe… 2.71e-5
## # … with abbreviated variable names ¹​Time_Point, ²​Diet_By_Time_Point,
## #   ³​Abundance

2.3.2 Compute the cumulative abundance by the new Phyla levels that you created

pig_clean <- pig_micro %>% 
  pivot_longer(cols = Acidobacteria:`unclassified (derived from other sequences)`,
              values_to = "Abundance", names_to = "Phyla") %>% 
  mutate(Phyla = case_when(Phyla == "Bacteroidetes" ~ Phyla,
                           Phyla == "Firmicutes" ~ Phyla,
                           TRUE ~ "Other phyla") ) %>% 
  group_by(Sample_Name, Phyla, Time_Point, Pig) %>% 
  summarise(Abundance = sum(Abundance)) 

head(pig_clean)
## # A tibble: 6 × 5
## # Groups:   Sample_Name, Phyla, Time_Point [6]
##   Sample_Name                                Phyla         Time_…¹   Pig Abund…²
##   <chr>                                      <chr>         <chr>   <dbl>   <dbl>
## 1 ShotgunWGS-ControlPig10GutMicrobiome-Day0  Bacteroidetes Day 0      10  0.481 
## 2 ShotgunWGS-ControlPig10GutMicrobiome-Day0  Firmicutes    Day 0      10  0.435 
## 3 ShotgunWGS-ControlPig10GutMicrobiome-Day0  Other phyla   Day 0      10  0.0839
## 4 ShotgunWGS-ControlPig10GutMicrobiome-Day14 Bacteroidetes Day 14     10  0.383 
## 5 ShotgunWGS-ControlPig10GutMicrobiome-Day14 Firmicutes    Day 14     10  0.513 
## 6 ShotgunWGS-ControlPig10GutMicrobiome-Day14 Other phyla   Day 14     10  0.104 
## # … with abbreviated variable names ¹​Time_Point, ²​Abundance

Here is another way to do this.

pig_clean_alt <- pig_micro %>%
  mutate(Other_phyla = rowSums(select(.[6:ncol(.)], !contains(c("Bacteroidetes", "Firmicutes")))))

pig_clean_alt <- pig_clean_alt %>%
  select(Sample_Name, Pig, Diet, Time_Point, Diet_By_Time_Point, Bacteroidetes, Firmicutes, Other_phyla)

pig_clean_alt_tidy <- pig_clean_alt %>%
  pivot_longer(cols = Bacteroidetes:Other_phyla,
               names_to = "Phyla",
               values_to = "Abundance")

2.3.3 Create the barplot in ggplot

# what are the time points called again?
unique(pig_clean$Time_Point)
## [1] "Day 0"  "Day 14" "Day 7"
# make Time_Point a factor so the faceting goes Day 0, Day 7, Day 14
# instead of putting Day 14 in the middle
pig_clean$Time_Point <- as.factor(pig_clean$Time_Point)

# set levels to be the order we want
pig_clean$Time_Point <- factor(pig_clean$Time_Point,
                               levels = c("Day 0", "Day 7", "Day 14"))

stack_barplot <- pig_clean %>% 
  ggplot(aes(as.numeric(Pig), Abundance, fill = Phyla, 
             text = glue::glue("{Phyla}: {round(Abundance, 3)*100}%"))) + 
  geom_col() +
  scale_fill_brewer(palette = "GnBu") +
  facet_grid(cols = vars(Time_Point))+
  theme_classic()+
  labs(y= "Relative Abundance", 
       fill= "Phyla",
       x =  "Pig Number",
       title = "Bacteriodetes, Firmicutes, and Other Phyla from \nPig Microbiome Sequencing using Shotgun Metagenomics") +
  theme(panel.grid = element_blank(), 
        axis.text = element_text(color= "black", size = 11),
        strip.text = element_text(color = "black", size = 10), 
        strip.background = element_blank())

stack_barplot

3 Make the plot interactive

ggplotly(stack_barplot,
         tooltip = "text") %>%
  layout(margin = list(t = 100)) # for increasing the padding around the title